Prior to running this code:

# Set Working Directory before starting

# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))

suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(EnhancedVolcano))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))

source("resources/functions_workshop.R")

1. Load Processed Objects

load("objects/processed_objects.RData")

# Prepare objects for DGE
tcell_obj <- PrepSCTFindMarkers(tcell_obj)
## Minimum UMI unchanged. Skipping re-correction.
bcell_obj <- PrepSCTFindMarkers(bcell_obj)
## Found 4 SCT models. Recorrecting SCT counts using minimum median counts: 2241
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

2. Subset Disease Objects

a. subset B and T cells

  • Combine Moderate and High into Expanded to set up comparing Expanded vs Non-expanded clonotypes
  • There are far more T cells compared to B cells, which allows us to do a CSF only analysis for T cells, but not B cells
# T cell CSF object
tcell_obj_csf <- tcell_obj[,names(tcell_obj$status[tcell_obj$status %in% "Disease"])]
tcell_obj_csf <- tcell_obj_csf[,names(tcell_obj_csf$compartment[tcell_obj_csf$compartment %in% "CSF"])]
tcell_obj_csf$expansion_category[tcell_obj_csf$expansion_category %in% c("High","Moderate")] <- "Expanded"
tcell_obj_csf$expansion.subtype <- paste(tcell_obj_csf$expansion_category,tcell_obj_csf$sub.celltype.annot)


# B cell object
bcell_obj_csfpb <- bcell_obj
bcell_obj_csfpb$expansion_category[bcell_obj_csfpb$expansion_category %in% c("High","Moderate")] <- "Expanded"
bcell_obj_csfpb$expansion.subtype <- paste(bcell_obj_csfpb$expansion_category,bcell_obj_csfpb$sub.celltype.annot)

b. B cell gene usage (heatmap)

  • With RNA-Seq alone, it is hard to separate out clonotypes

V genes

  • There is some indication that cells with the same V genes group together, but the patterns are hard to distinguish
genes_bcells             <- row.names(bcell_obj_csfpb)
vgenes_bcells            <- sort(genes_bcells[grep("IGHV",genes_bcells)])

h1 <- dittoHeatmap(bcell_obj_csfpb, genes = vgenes_bcells,
        annot.by = c("sub.celltype.annot","compartment","status"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = TRUE,
        cluster_cols = TRUE,
        main = "V gene usage B cells")

Other Ig genes

  • Elevated IgM expression among B cells
  • There is some level of grouping of CSF cells with IgA1 and IgG
genes_bcells             <- row.names(bcell_obj_csfpb)
other_ig_genes_bcells    <- sort(genes_bcells[grep("IGHA|IGHG|IGHM|IGHD",genes_bcells)])

h2 <- dittoHeatmap(bcell_obj_csfpb, genes = other_ig_genes_bcells,
        annot.by = c("sub.celltype.annot","compartment","status"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = TRUE,
        cluster_cols = FALSE,
        main = "Ig gene usage B cells")

3. Differential Gene Expression (DGE)

a. B cells - CSF vs. PB

TOP_N <- 100

# B cells
Idents(bcell_obj) <- bcell_obj$compartment
levels(bcell_obj) <- c("CSF","PB")

dge_compartment_bcells_all <- FindAllMarkers(bcell_obj,logfc.threshold = 0.0,test.use = "wilcox")
## Calculating cluster CSF
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Calculating cluster PB
dge_compartment_bcells <- dge_compartment_bcells_all[dge_compartment_bcells_all$p_val_adj < 0.05,]
top_genes_bcells       <- dge_compartment_bcells %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

Heatmaps - B cells

Idents(bcell_obj) <- bcell_obj$compartment
levels(bcell_obj) <- c("CSF","PB")

p1 <- dittoHeatmap(bcell_obj, genes = top_genes_bcells$gene,
        annot.by = c("compartment","status", "sub.celltype.annot"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = FALSE,
        cluster_cols = FALSE,
        main = "CSF vs. PB B cells")

# CSF vs PB
p1.2 <- DoHeatmap(bcell_obj,features = top_genes_bcells$gene)

p1

p1.2

Volcano plot CSF DGE

dge_compartment_bcells_csf <- dge_compartment_bcells_all[dge_compartment_bcells_all$cluster %in% "CSF",]

plot_volcano<- EnhancedVolcano(toptable = dge_compartment_bcells_csf,
    lab = dge_compartment_bcells_csf$gene,
    x = 'avg_log2FC',
    y = 'p_val_adj',
    ylim = c(0,-log2(10e-4)),
    xlim = c(-3.5,3.5),
    title = 'DGE Bcells CSF vs PB',
    ylab = "-Log2(P)",
    xlab = "avg_log2FC",
    pCutoff = 0.05,
    FCcutoff = 1.0,
    pointSize = 1,
    labSize = 3)

plot_volcano

4. T cell DGE - Expanded vs Non-Expanded Clones

a. run DGE on T cells Expanded vs. Non-expanded (CSF)

TOP_N <- 12
Idents(tcell_obj_csf) <- tcell_obj_csf$expansion.subtype
levels(tcell_obj_csf) <- c("Expanded CD8 T cells","Non-expanded CD8 T cells","Expanded CD4 T cells","Non-expanded CD4 T cells")

dge_tcell_csf   <- FindAllMarkers(tcell_obj_csf,logfc.threshold = 0.0,test.use = "wilcox")
## Calculating cluster Expanded CD8 T cells
## Calculating cluster Non-expanded CD8 T cells
## Calculating cluster Expanded CD4 T cells
## Calculating cluster Non-expanded CD4 T cells
dge_tcell_csf   <- dge_tcell_csf[dge_tcell_csf$p_val_adj < 0.05,]
top_genes_tcells_csf <- dge_tcell_csf %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

b. Dot plot of top expanded and non-expanded genes

# fig.height=11 | fig.height=7 for workshop

DotPlot(object = tcell_obj_csf,features = unique(top_genes_tcells_csf$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("T cells CSF Expanded vs. Non-Expanded") + coord_flip()

c. Dot plot of top expanded genes

# fig.height=11 | fig.height=7 for workshop

# table(dge_tcell_csf$cluster)
top_expanded_tcells <- dge_tcell_csf[dge_tcell_csf$cluster %in% c("Expanded CD8 T cells","Expanded CD4 T cells"),]

DotPlot(object = tcell_obj_csf,features = unique(top_expanded_tcells$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("T cells CSF Top Expanded Genes") + coord_flip()

5. B cell DGE - Expanded vs Non-Expanded Clonotypes

a. run DGE on B cells Expanded vs. Non-expanded (CSF and PB)

TOP_N <- 10
Idents(bcell_obj_csfpb) <- bcell_obj_csfpb$expansion.subtype
complete_order          <- c("Expanded Plasmablast","Non-expanded Plasmablast",
                             "Expanded B memory","Non-expanded B memory",
                             "Expanded B intermediate","Non-expanded B intermediate",
                             "Expanded B naive","Non-expanded B naive")
complete_order_sub <- complete_order[complete_order %in% unique(bcell_obj_csfpb$expansion.subtype)]
levels(bcell_obj_csfpb) <- complete_order_sub

dge_bcell_csfpb   <- FindAllMarkers(bcell_obj_csfpb,logfc.threshold = 0.0,test.use = "wilcox")
## Calculating cluster Expanded Plasmablast
## Calculating cluster Non-expanded Plasmablast
## Calculating cluster Expanded B memory
## Calculating cluster Non-expanded B memory
## Calculating cluster Expanded B intermediate
## Calculating cluster Non-expanded B intermediate
## Calculating cluster Expanded B naive
## Calculating cluster Non-expanded B naive
dge_bcell_csfpb   <- dge_bcell_csfpb[dge_bcell_csfpb$p_val_adj < 0.05,]
top_genes_bcells_csfpb <- dge_bcell_csfpb %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

b. Dot plot of top genes

  • Explore the top DE genes for different expanded populatations
# fig.height=17 | fig.height=9 for workshop

DotPlot(object = bcell_obj_csfpb,features = unique(top_genes_bcells_csfpb$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("B cells CSF Expanded vs. Non-Expanded") + coord_flip()

c. Dot plot of DE genes for one expanded B cell subtype

# table(dge_bcell_csfpb$cluster)
top_expanded <- dge_bcell_csfpb[dge_bcell_csfpb$cluster %in% "Expanded B memory",]

DotPlot(object = bcell_obj_csfpb,features = unique(top_expanded$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("B cells CSF Expanded vs. Non-Expanded") + coord_flip()

4. Pathway analysis

suppressPackageStartupMessages(library(ReactomePA))
suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(enrichplot))
suppressPackageStartupMessages(library(ggnewscale))

suppressPackageStartupMessages(library(DOSE))
suppressPackageStartupMessages(library(clusterProfiler))
suppressPackageStartupMessages(library(ggupset))

a. B cells Memory

# Most of the expression should be in the target cluster
# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_bcell_csfpb[dge_bcell_csfpb$cluster %in% c("Expanded B memory","Expanded B intermediate","Expanded B naive"),]

# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)

# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
  egene <- as.character(de_genes[g])
  entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col

de_fc_list        <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id

# P-value vec
de_pval_list        <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id

# Pathway Enrichment
bcell_pathways       <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
bcell_pathways_df    <- as.data.frame(bcell_pathways)

# Top enriched pathways and the gene count within each pathway
barplot(bcell_pathways, showCategory = 10)

Network of Pathways and genes (DisGeNET database)

edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)

## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')

## Network categorySize can be scaled by 'pvalue' or 'geneNum' 
p1_bcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)

## Heatmap
p2_bcell <- heatplot(edox, foldChange=de_fc_list, showCategory=14)

plot_grid(p1_bcell,p2_bcell,ncol = 2)

b. T cells CSF CD8

# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_tcell_csf[dge_tcell_csf$cluster %in% c("Expanded CD8 T cells","Non-expanded CD8 T cells"),]

# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)

# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
  egene <- as.character(de_genes[g])
  entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col

de_fc_list        <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id

# P-value vec
de_pval_list        <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id

tcell_pathways <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
tcell_pathways_df <- as.data.frame(tcell_pathways)

barplot(tcell_pathways, showCategory=10)

Network of genes shared between pathways

tcell_pathways_net <- pairwise_termsim(tcell_pathways,showCategory = 25)
emapplot(tcell_pathways_net, showCategory = 25,layout = "kk")

Network of Pathways and genes (DisGeNET database)

edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)

## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')

## Network categorySize can be scaled by 'pvalue' or 'geneNum' 
p1_tcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)

## Heatmap
p2_tcell <- heatplot(edox, foldChange=de_fc_list, showCategory=20)

plot_grid(p1_tcell,p2_tcell,ncol = 2)